#ifndef COMM_H_
#define COMM_H_
#include "esmd_types.h"
#include "cell.h"
#include <mpi.h>
#include "utils.hpp"

struct mpp_t {
  int pid, nproc;
  vec<int> loc, dim;
  box<real> lbox, gbox;
  vec<real> llen;
  vec<int> next, prev;
  char *send_prev, *send_next;
  char *recv_prev, *recv_next;
  MPI_Request recv_req_prev, recv_req_next, send_req_prev, send_req_next;
  MPI_Status recv_stat_prev, recv_stat_next, send_stat_prev, send_stat_next;
  size_t max_comm_size;
  MPI_Comm comm;
  template<typename T>
  void bcast(T *dat, int count, int root=0){
    MPI_Bcast(dat, count * sizeof(T), MPI_BYTE, root, comm);
  }
  template<typename T>
  T bcast_var(T val, int root=0){
    T ret = val;
    MPI_Bcast(&ret, sizeof(T), MPI_BYTE, root, comm);
    return ret;
  }
};

#define get_proc_range(mpp, dir, i, lo, hi)            \
  {                                                    \
    lo = mpp->gbox.lo.dir + i * mpp->llen.dir;         \
    if (i == mpp->dim.dir - 1) {                       \
      hi = gbox.hi.dir                                 \
    } else                                             \
      hi = mpp->gbox.lo.dir + (i + 1) * mpp->llen.dir; \
  }
INLINE int get_dproc(mpp_t *mpp, int dx, int dy, int dz) {
  return ((mpp->loc.x + dx) * mpp->dim.y + mpp->loc.y + dy) * mpp->dim.z + dz;
}
constexpr static int stag_next = 0x3001;
constexpr static int stag_prev = 0x3000;
constexpr static int rtag_next = 0x3000;
constexpr static int rtag_prev = 0x3001;

//function signatures
void comm_init(mpp_t *mpp, MPI_Comm comm, box<real> * gbox);
void comm_init_buf(mpp_t *mpp, cellgrid_t *grid);
void comm_reduce(void *buf, int count, MPI_Datatype type, MPI_Op op, mpp_t *mpp);
void comm_allreduce(void *buf, int count, MPI_Datatype type, MPI_Op op, mpp_t *mpp);
void comm_vreduce(int count, MPI_Datatype type, MPI_Op op, mpp_t *mpp, ...);
void comm_vallreduce(int count, MPI_Datatype type, MPI_Op op, mpp_t *mpp, ...);
void comm_reduce_stat(mdstat_t *stat, mpp_t *mpp);
void comm_allreduce_stat(mdstat_t *stat, mpp_t *mpp);
size_t pack_cell_forward_most(void *buf, celldata_t *cell);
size_t unpack_cell_forward_most(void *buf, celldata_t *cell);
size_t pack_brick_forward_most(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
size_t unpack_brick_forward_most(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
void forward_comm_most(cellgrid_t *grid, mpp_t *mpp);
size_t pack_cell_forward_x(void *buf, celldata_t *cell);
size_t unpack_cell_forward_x(void *buf, celldata_t *cell);
size_t pack_brick_forward_x(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
size_t unpack_brick_forward_x(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
void forward_comm_x(cellgrid_t *grid, mpp_t *mpp);
size_t pack_cell_reverse_f(void *buf, celldata_t *cell);
size_t unpack_cell_reverse_f(void *buf, celldata_t *cell);
size_t pack_brick_reverse_f(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
size_t unpack_brick_reverse_f(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
void reverse_comm_f(cellgrid_t *grid, mpp_t *mpp);
size_t pack_cell_forward_v(void *buf, celldata_t *cell);
size_t unpack_cell_forward_v(void *buf, celldata_t *cell);
size_t pack_brick_forward_v(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
size_t unpack_brick_forward_v(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
void forward_comm_v(cellgrid_t *grid, mpp_t *mpp);
size_t pack_cell_reverse_v(void *buf, celldata_t *cell);
size_t unpack_cell_reverse_v(void *buf, celldata_t *cell);
size_t pack_brick_reverse_v(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
size_t unpack_brick_reverse_v(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
void reverse_comm_v(cellgrid_t *grid, mpp_t *mpp);
size_t pack_cell_forward_export_list(void *buf, celldata_t *cell);
size_t unpack_cell_forward_export_list(void *buf, celldata_t *cell);
size_t pack_brick_forward_export_list(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
size_t unpack_brick_forward_export_list(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
void forward_comm_export_list(cellgrid_t *grid, mpp_t *mpp);
size_t pack_cell_forward_export_list_cg(void *buf, celldata_t *cell);
size_t unpack_cell_forward_export_list_cg(void *buf, celldata_t *cell);
size_t pack_brick_forward_export_list_cg(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
size_t unpack_brick_forward_export_list_cg(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
void forward_comm_export_list_cg(cellgrid_t *grid, mpp_t *mpp);
size_t pack_cell_forward_shake(void *buf, celldata_t *cell);
size_t unpack_cell_forward_shake(void *buf, celldata_t *cell);
size_t pack_brick_forward_shake(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
size_t unpack_brick_forward_shake(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
void forward_comm_shake(cellgrid_t *grid, mpp_t *mpp);
size_t pack_cell_reverse_shake(void *buf, celldata_t *cell);
size_t unpack_cell_reverse_shake(void *buf, celldata_t *cell);
size_t pack_brick_reverse_shake(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
size_t unpack_brick_reverse_shake(void *buf, cellgrid_t *grid, int xlo, int xhi, int ylo, int yhi, int zlo, int zhi);
void reverse_comm_shake(cellgrid_t *grid, mpp_t *mpp);
//end function signatures
#endif
